##Connect to Database This Practicum will use a remote instance of MySQL on db4free.net.This database is very easy to set up and there is no configuration or management.
# 1. Library
library(RMySQL)
## Loading required package: DBI
library(DBI)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
##
## Attaching package: 'RSQLite'
## The following object is masked from 'package:RMySQL':
##
## isIdCurrent
## sqldf will default to using MySQL
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tinytex)
# 2. Settings
db_user <- 'sarthak55k'
db_password <- 'Sarthak@ubs123'
db_name <- 'db5200practicum1'
db_host <- 'db4free.net'
db_port <- 3306
# 3. Connect to DB
mydb <- dbConnect(MySQL(), user = db_user, password = db_password,
dbname = db_name, host = db_host, port = db_port)
##Create Database
#4A. Create table incidents This table will stores wildlife strike incidents.
DROP TABLE IF EXISTS incidents;
CREATE TABLE incidents(
rid INTEGER NOT NULL PRIMARY KEY,
`dep.date` DATE NULL,
origin INTEGER NULL,
airline INTEGER NULL,
aircraft TEXT NULL,
`flight.phase` SET ("takeoff", "landing", "inflight", "unknown") NOT NULL DEFAULT 'unknown',
altitude INTEGER NULL,
conditions VARCHAR(255) NULL,
warned BOOLEAN NOT NULL DEFAULT FALSE,
CHECK(altitude >= 0)
)
#4B. Create table airports This table will stores airport name, airport code and states.
DROP TABLE IF EXISTS airports;
CREATE TABLE airports(
aid INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,
airportName TEXT NOT NULL,
airportCode TEXT NULL,
state TEXT
)
#4C. Setup origin foreign key Given chunk will link the incidents and airports tables via the origin foreign key in incidents to the primary key aid in airports.
ALTER TABLE incidents ADD CONSTRAINT airports_origin_fkey FOREIGN KEY (origin) REFERENCES airports(aid);
#4D. Create lookup table conditions This table will link incidents table with conditions foreign key.
DROP TABLE IF EXISTS conditions;
CREATE TABLE conditions(
cid INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,
`condition` VARCHAR(255) NOT NULL,
explanation TEXT NULL,
UNIQUE (`condition`)
)
Make conditions column a foreign key.
ALTER TABLE incidents MODIFY conditions INTEGER;
ALTER TABLE incidents ADD CONSTRAINT conditions_fkey FOREIGN KEY (conditions) REFERENCES conditions(cid);
#4E. Create table airlines This table will stores airlines name, airlines code and flag.
DROP TABLE IF EXISTS airlines;
CREATE TABLE airlines(
eid INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,
airlineName TEXT NOT NULL,
airlineCode TEXT NULL,
flag TEXT NULL
)
#4F. Setup origin foreign key Given chunk will link the incidents and airlines tables via the airline foreign key in incidents to the primary key eid in airlines.
ALTER TABLE incidents ADD CONSTRAINT airports_airlines_fkey FOREIGN KEY (airline) REFERENCES airlines(eid);
bds.raw <- read.csv("BirdStrikesData-V2.csv", stringsAsFactors = FALSE)
bds.raw
#Transformaing the flight phase.
unique(bds.raw$flight_phase)
## [1] "Climb" "Landing Roll" "Approach" "Take-off run" "Descent"
## [6] "" "Taxi" "Parked"
x <- c("Parked","Taxi","Take-off run","Climb", "Approach", "Descent", "Landing Roll","")
y <- c("unknown","unknown","takeoff","takeoff","inflight","landing","landing","unknown")
bds.raw$flight_phase <- mapvalues(bds.raw$flight_phase,
from=x,
to=y)
bds.raw$flight_date[bds.raw$flight_date == "" | bds.raw$flight_date == "N/A" | bds.raw$flight_date == "UNKNOWN" | is.na(bds.raw$flight_date) == TRUE ] <- "1/1/2000 0:00"
view1 <- distinct(bds.raw[,c(3,10)],.keep_all=TRUE)
view1$airport[view1$airport == "" | view1$airport == "N/A" | view1$airport == "UNKNOWN" | is.na(view1$airport) == "TRUE"] <- "unknown"
view1$origin[view1$origin == "" | view1$origin == "N/A" | view1$origin == "UNKNOWN" | is.na(view1$origin) == "TRUE"] <- "unknown"
view1
unique(view1$airport[view1$airport == "unknown"])
## [1] "unknown"
query <- paste0("INSERT INTO airports (airportName, state) VALUES ",
paste(sprintf('("%s", "%s")', view1$airport, view1$origin),
collapse = ","))
dbExecute(mydb, query)
## [1] 1141
#updating the view1
view1 <- dbGetQuery(mydb,"SELECT aid, airportName AS airport FROM airports")
view1
view2 <- distinct(bds.raw['airline'],.keep_all=TRUE)
view2$airline[view2$airline == "" | view2$airline == "N/A" | view2$airline == "UNKNOWN" | is.na(view2$airline) == "TRUE"] <- "unknown"
View(view2)
unique(view2$airline[view2$airline == "UNKNOWN"])
## character(0)
query <- paste0("INSERT INTO airlines (airlineName) VALUES ",
paste(sprintf('("%s")', view2$airline),
collapse = ","))
dbExecute(mydb, query)
## [1] 293
#updating the view2
view2 <- dbGetQuery(mydb,"SELECT eid, airlineName AS airline FROM airlines")
view2
view3 <- distinct(bds.raw["sky_conditions"],.keep_all = TRUE)
view3
query <- paste0("INSERT INTO conditions (`condition`) VALUES ",
paste(sprintf("('%s')", view3$sky_conditions),
collapse = ","))
dbExecute(mydb, query)
## [1] 3
#updating the view3
view3 <- dbGetQuery(mydb,"SELECT cid,`condition` AS sky_conditions FROM conditions")
view3
temp <-left_join(left_join(left_join(bds.raw,view1, by = "airport"),view2, by = "airline"),view3, by = "sky_conditions")
temp
#c(1,7,2,11,18,17)
view4 <- temp[,c(1,7,20,21,2,11,18,22,17)]
view4$flight_date <- gsub('-', '/', view4$flight_date)
view4$flight_date <- as.Date(view4$flight_date, "%m/%d/%y")
#view2$airlines[view1$airlines == ""] <- "unknown"
view4
#view4\(rid,view4\)flight_date,view4\(aid,view4\)eid,view4\(aircraft,view4\)flight_phase,view4\(cid,view4\)pilot_warned_flag,
query <- paste0("INSERT INTO incidents (rid, `dep.date`, origin, airline, aircraft, `flight.phase`, altitude, `conditions`, warned) VALUES ",
paste(sprintf("('%s', NULLIF('%s', 'NA'), NULLIF('%d', 'NA'), NULLIF('%d', 'NA'),'%s', '%s', NULLIF('%s', 'NA'), NULLIF('%d', 'NA'), '%d')",
view4$rid, view4$flight_date, view4$aid, view4$eid, view4$aircraft, view4$flight_phase, as.numeric(gsub(",", "", view4$altitude_ft)), view4$cid, ifelse(view4$pilot_warned_flag == 'Y', TRUE, FALSE)),
collapse = ","))
dbExecute(mydb, query)
## [1] 25558
select * from incidents
#DELETE FROM incidents
| rid | dep.date | origin | airline | aircraft | flight.phase | altitude | conditions | warned |
|---|---|---|---|---|---|---|---|---|
| 1195 | 2020-11-13 | 37 | 21 | Airplane | inflight | 2000 | 3 | 1 |
| 3019 | 2020-10-10 | 717 | 21 | Airplane | takeoff | 400 | 1 | 1 |
| 3500 | 2020-05-15 | 37 | 21 | Airplane | inflight | 1000 | 1 | 1 |
| 3504 | 2020-05-23 | 37 | 21 | Airplane | inflight | 1800 | 1 | 1 |
| 3597 | 2020-04-18 | 123 | 21 | Airplane | inflight | 200 | 2 | 1 |
| 4064 | 2020-04-06 | 37 | 21 | Airplane | inflight | 1000 | 1 | 1 |
| 4074 | 2020-07-15 | 180 | 21 | Airplane | takeoff | 0 | 1 | 1 |
| 4076 | 2020-07-15 | 37 | 21 | Airplane | takeoff | 500 | 2 | 1 |
| 4090 | 2020-07-02 | 114 | 21 | Airplane | takeoff | 50 | 2 | 1 |
| 4091 | 2020-07-07 | 114 | 21 | Airplane | takeoff | 0 | 2 | 1 |
#8 limit 10 is remaining
query <- "SELECT ar.state, count(*) NoOfIncidents FROM incidents as i LEFT JOIN airports as ar ON i.origin = ar.aid group by ar.state order by NoOfIncidents DESC LIMIT 10"
dbGetQuery(mydb,query)
#9
SELECT ai.airlineName, count(*) NoOfIncidents
FROM incidents as i
LEFT JOIN
airlines as ai
ON i.airline = ai.eid
GROUP BY ai.airlineName
HAVING NoOfIncidents >
(
SELECT avg(NoOfIncidents)
FROM (
SELECT ai.airlineName, count(*) NoOfIncidents
FROM incidents as i
LEFT JOIN
airlines as ai
ON i.airline = ai.eid
GROUP BY ai.airlineName
) as av
)
ORDER BY NoOfIncidents DESC
| airlineName | NoOfIncidents |
|---|---|
| SOUTHWEST AIRLINES | 4628 |
| BUSINESS | 3074 |
| AMERICAN AIRLINES | 2058 |
| DELTA AIR LINES | 1349 |
| AMERICAN EAGLE AIRLINES | 932 |
| SKYWEST AIRLINES | 891 |
| US AIRWAYS* | 797 |
| JETBLUE AIRWAYS | 708 |
| UPS AIRLINES | 590 |
| US AIRWAYS | 540 |
#10 #in jan how many incident happend for a particular flight phase considering all years. #group by on month, flight phase #count(*)
SELECT MONTH(i.`dep.date`) as Month, i.`flight.phase`, count(*) NoOfIncidents
FROM incidents as i
GROUP BY MONTH(i.`dep.date`),i.`flight.phase`
ORDER BY Month,NoOfIncidents;
| Month | flight.phase | NoOfIncidents |
|---|---|---|
| 1 | unknown | 133 |
| 1 | landing | 202 |
| 1 | takeoff | 357 |
| 1 | inflight | 374 |
| 2 | unknown | 2 |
| 2 | landing | 176 |
| 2 | takeoff | 293 |
| 2 | inflight | 301 |
| 3 | unknown | 9 |
| 3 | landing | 245 |
query <- "SELECT MONTH(i.`dep.date`) as Month, i.`flight.phase`, count(*) NoOfIncidents FROM incidents as i GROUP BY MONTH(i.`dep.date`),i.`flight.phase` ORDER BY Month;"
t <- dbGetQuery(mydb,query)
ggplot(t, aes(x = Month, y = NoOfIncidents)) +
geom_point()
CREATE PROCEDURE SPAddNewIncident(
nRid INTEGER,
nDepDate DATE,
nAirport TEXT,
nOrigin TEXT,
nAirline TEXT,
nAircraft TEXT,
nPhase TEXT,
nConditions TEXT,
nAltitude INTEGER,
nWarned BOOLEAN
)
BEGIN
declare naid int default 0;
declare neid int default 0;
declare ncid int default 0;
IF (ISNULL(nRid))
THEN
SIGNAL SQLSTATE '42601'
SET MESSAGE_TEXT = 'rid can not be null!';
END IF;
IF (nAltitude < 0)
THEN
SIGNAL SQLSTATE '42601'
SET MESSAGE_TEXT = 'altitude can not be negative!';
END IF;
IF (nPhase not in ("takeoff", "landing", "inflight", "unknown"))
THEN
SIGNAL SQLSTATE '42601'
SET MESSAGE_TEXT = 'phase is incorrect!';
END IF;
IF (nAltitude >= 0)
THEN
SET naid = (SELECT aid FROM airports WHERE airportName = nAirport AND state = nOrigin);
IF (ISNULL(naid))
THEN
INSERT INTO airports (airportName, state) VALUES (nAirport, nOrigin);
SET naid = (SELECT aid FROM airports WHERE airportName = nAirport AND state = nOrigin);
END IF;
SET neid = (SELECT eid FROM airlines WHERE airlineName = nAirline);
IF (ISNULL(neid))
THEN
INSERT INTO airlines (airlineName) VALUES (nAirline);
SET neid = (SELECT eid FROM airlines WHERE airlineName = nAirline);
END IF;
SET ncid = (SELECT cid FROM conditions WHERE `condition` = nConditions);
INSERT INTO incidents (rid, `dep.date`, origin, airline, aircraft, `flight.phase`, altitude, `conditions`, warned)
VALUES
(nRid, nDepDate, naid, neid, nAircraft, nPhase, nAltitude, ncid, nWarned);
END IF;
END
CALL SPAddNewIncident(11, '1997-06-10','LAGUARDIA NY', 'NY', 'AMERICAN AIRLINES', 'Airplane', 'unknown', 'No Cloud', 50, TRUE);
DROP PROCEDURE SPAddNewIncident;
# Disconnect database connection
dbDisconnect(mydb)
## [1] TRUE